knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
```r
install.packages(\tidytext\)
install.packages(\textstem\)
install.packages(\clinspacy\)
install.packages(\topicmodels\)
install.packages(\reshape2\)
install.packages(\stringr\)

<!-- rnb-source-end -->

<!-- rnb-chunk-end -->


<!-- rnb-text-begin -->



<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeSh0aWR5dGV4dClcbmxpYnJhcnkodGV4dHN0ZW0pXG5saWJyYXJ5KGNsaW5zcGFjeSlcbmxpYnJhcnkodG9waWNtb2RlbHMpXG5saWJyYXJ5KHJlc2hhcGUyKVxubGlicmFyeShzdHJpbmdyKVxuYGBgIn0= -->

```r
library(tidytext)
library(textstem)
library(clinspacy)
library(topicmodels)
library(reshape2)
library(stringr)

###Data Parsing

raw.data <- clinspacy::dataset_mtsamples()
dplyr::glimpse(raw.data)
Rows: 4,999
Columns: 6
$ note_id           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, …
$ description       <chr> "A 23-year-old white female presents with complaint of allergies.", "Consult for laparoscopic g…
$ medical_specialty <chr> "Allergy / Immunology", "Bariatrics", "Bariatrics", "Cardiovascular / Pulmonary", "Cardiovascul…
$ sample_name       <chr> "Allergic Rhinitis", "Laparoscopic Gastric Bypass Consult - 2", "Laparoscopic Gastric Bypass Co…
$ transcription     <chr> "SUBJECTIVE:,  This 23-year-old white female presents with complaint of allergies.  She used to…
$ keywords          <chr> "allergy / immunology, allergic rhinitis, allergies, asthma, nasal sprays, rhinitis, nasal, ery…

1

###Data Description

##NoteID - This variable is the unique ID for each note.

##Description - This output provides a summary of the SOAP notes.

##Medical_Specialty - This variable refers to the medical speciality the patient is visiting.

##Sample_Name - This variable refers to the procedures undergone for each patient.

##Transcription - This output provides a full transcript of the physicians SOAP notes.

##Keywords - This output collects keywords from the Medical_Speciality, Sample_Name, and Transcription variables.


raw.data %>% dplyr::select(medical_specialty) %>% dplyr::n_distinct()
[1] 40

###Transcripts per specialty

ggplot2::ggplot(raw.data, ggplot2::aes(y=medical_specialty)) + ggplot2::geom_bar() + labs(x="Document Count", y="Medical Speciality")

filtered.data <- raw.data %>% dplyr::filter(medical_specialty %in% c("Orthopedic", "Radiology", "Surgery")) 

###Text Processing


analysis.data <- filtered.data %>%
  unnest_tokens(word, transcription) %>%
  mutate(word = str_replace_all(word, "[^[:alnum:]]", "")) %>%
  filter(!str_detect(word, "[0-9]")) %>%
  anti_join(stop_words) %>%
  group_by(note_id) %>%
  summarise(transcription = paste(word, collapse = " ")) %>%
  left_join(select(filtered.data, -transcription), by = "note_id")
Joining with `by = join_by(word)`
tokenized.data.unigram <- analysis.data %>% tidytext::unnest_tokens(word, transcription, to_lower=TRUE)
tokenized.data <- analysis.data %>% tidytext::unnest_tokens(ngram, transcription, token = "ngrams", n=2, to_lower = TRUE)

2

###Unique Tokens per Speciality

tokenized.data.unigram %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(word) %>% dplyr::summarise(n=dplyr::n())

##Unique Unigrams

##There are 7682 unique unigrams in the orthopedic speciality.

##There are 5935 unique unigrams in the radiology speciality.

##There are 11977 unique unigrams in the surgery speciality.


tokenized.data %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(ngram) %>% dplyr::summarise(n=dplyr::n())

word_counts <- tokenized.data.unigram %>%
    group_by(word) %>%
    summarise(count = n()) %>%
    ungroup() %>%
    arrange(desc(count))

count_distribution <- word_counts %>%
  group_by(count) %>%
  summarise(num_words = n()) %>%
  ungroup()
 
 ggplot2::ggplot(count_distribution, aes(x = count, y = num_words)) +
  geom_point() +
  labs(title = "Scatter Plot of Count Distribution",
       x = "Count of Unique Words",
       y = "Number of Words")

word_counts <- tokenized.data %>%
    group_by(ngram) %>%
    summarise(count = n()) %>%
    ungroup() %>%
    arrange(desc(count))

count_distribution <- word_counts %>%
  group_by(count) %>%
  summarise(num_words = n()) %>%
  ungroup()
 
 ggplot2::ggplot(count_distribution, aes(x = count, y = num_words)) +
  geom_point() +
  labs(title = "Scatter Plot of Count Distribution",
       x = "Count of Unique Bigrams",
       y = "Number of Words")

3 ###Unique bigrams per category


tokenized.data %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(ngram) %>% dplyr::summarise(n=dplyr::n())

##Unique Bigrams

##There are 55732 unique bigrams in the orthopedic specialty.

##There are 28297 unique bigrams in the radiology speciality.

##There are 130404 unique bigrams in the surgey speciality.

4

##Unique Sentences

analysis.data <- filtered.data %>%
  unnest_tokens(sentence, transcription, token = "sentences") %>%
  mutate(sentence = str_replace_all(sentence, "[^[:alnum:]\\s]", "")) %>%
  filter(!str_detect(sentence, "[0-9]")) %>%
  cross_join(stop_words) %>%
  group_by(note_id) %>%
  summarise(transcription = paste(sentence, collapse = " ")) %>%
  left_join(select(filtered.data, -transcription), by = "note_id")
?cross_join
?str_detect
tokenized.data.sentence <- analysis.data %>% tidytext::unnest_tokens(ngram, transcription, token = "sentences", to_lower = TRUE)
tokenized.data.sentence %>%
  dplyr::group_by(medical_specialty) %>%
  dplyr::count(name = "n") %>%
  dplyr::ungroup()

##Unique Sentences

##There are 350 unique bigrams in the orthopedic specialty.

##There are 262 unique bigrams in the radiology speciality.

##There are 1085 unique bigrams in the surgey speciality.

###Words per Category

tokenized.data %>%
  dplyr::group_by(medical_specialty) %>%
  dplyr::count(ngram, sort = TRUE) %>%
  dplyr::top_n(5)
Selecting by n

5

##Use of a Lemmatizer

#A general purpose lemmatizer may not work well for medical data. This is because medical data contains highly specialized terms that require accurately trained methods to be trained to accurately token terms. Some specific issues include:

#a. Medical data usually contains specialized terms, drugs names, and jargon. Therefore, a general purpose tool may not have the knowledge of these terms and may not be proficient in accurately identfying the lemmas.

#b. Medical terms typically come from different parts of speech such as nouns, verbs, and adjectives. Since the process of lemmatizing requires mapping to generate correct lemmas, general purpose lemmas (which have not been trained on medical data) may not process the variations in medical speech effectively.

lemmatized.data <- tokenized.data %>% dplyr::mutate(lemma=textstem::lemmatize_words(ngram))
lemma.freq <- lemmatized.data %>% 
  dplyr::count(medical_specialty, lemma) %>%
  dplyr::group_by(medical_specialty) %>% 
  dplyr::mutate(proportion = n / sum(n)) %>%
  tidyr::pivot_wider(names_from = medical_specialty, values_from = proportion) %>%
  tidyr::pivot_longer(`Surgery`:`Radiology`,
               names_to = "medical_specialty", values_to = "proportion")
ggplot2::ggplot(lemma.freq, ggplot2::aes(x=proportion, 
                                         y=`Orthopedic`,
                                         color=abs(`Orthopedic` - proportion))) + 
  ggplot2::geom_abline(color="gray40", lty=2) +
  ggplot2::geom_jitter(alpha=0.1, size=2.5, width=0.3, height=0.3) +
  ggplot2::geom_text(ggplot2::aes(label=lemma), check_overlap=TRUE, vjust=1.5) +
  ggplot2::scale_x_log10(labels=scales::percent_format()) + 
  ggplot2::scale_y_log10(labels=scales::percent_format()) + 
  ggplot2::scale_color_gradient(limits=c(0, 0.001), low="darkslategray4", high="gray75") +
  ggplot2::facet_wrap(~medical_specialty, ncol = 2) +
  ggplot2::theme(legend.position="none") +
  ggplot2:: labs(y="Orthopedic", x = NULL)

6

##Analyzing relative proportions

#This plot visualizes the same relative proportion of lemmas in each speciality. Based on these specialties, I would not expect to see the exact same relative proportions. There are some differences in the relationship between orthopaedics and radiology, and orthopaedics and surgery:

#Orthopaedics and surgery are more likely to have a high degree of common lemmas. Both specialties are based on standard anatomical terms.

#Orthopaedics and radiology are less likely to have a high degree of common lemmas. There is definitely overlap however, radiology may refer more to the technique of medical imaging. Therefore, many of the anatomical terms, diagnoses, and treatments related to orthopaedics may be missed.

7

##Direct comparison of surgery and radiology

library(dplyr)
library(tidyr)
library(ggplot2)
library(textstem)
lemmatized.data <- tokenized.data %>% dplyr::mutate(lemma=textstem::lemmatize_words(ngram))
lemma.freq <- lemmatized.data %>% 
  dplyr::count(medical_specialty, lemma) %>%
  dplyr::group_by(medical_specialty) %>% 
  dplyr::mutate(proportion = n / sum(n)) %>%
  tidyr::pivot_wider(names_from = medical_specialty, values_from = proportion) %>%
  tidyr::pivot_longer(`Orthopedic`:`Radiology`,
               names_to = "medical_specialty", values_to = "proportion")
ggplot2::ggplot(lemma.freq, ggplot2::aes(x=proportion, 
                                         y=`Surgery`,
                                         color=abs(`Surgery` - proportion))) + 
  ggplot2::geom_abline(color="gray40", lty=2) +
  ggplot2::geom_jitter(alpha=0.1, size=2.5, width=0.3, height=0.3) +
  ggplot2::geom_text(ggplot2::aes(label=lemma), check_overlap=TRUE, vjust=1.5) +
  ggplot2::scale_x_log10(labels=scales::percent_format()) + 
  ggplot2::scale_y_log10(labels=scales::percent_format()) + 
  ggplot2::scale_color_gradient(limits=c(0, 0.001), low="darkslategray4", high="gray75") +
  ggplot2::facet_wrap(~medical_specialty, ncol = 2) +
  ggplot2::theme(legend.position="none") +
  ggplot2:: labs(y="Surgery", x = NULL)

###TF-IDF Normailization

lemma.counts <- lemmatized.data %>% dplyr::count(medical_specialty, lemma)
total.counts <- lemma.counts %>% 
                      dplyr::group_by(medical_specialty) %>% 
                      dplyr::summarise(total=sum(n))

all.counts <- dplyr::left_join(lemma.counts, total.counts)
Joining with `by = join_by(medical_specialty)`
all.counts.tfidf <- tidytext::bind_tf_idf(all.counts, lemma, medical_specialty, n) 
all.counts.tfidf %>% dplyr::group_by(medical_specialty) %>% dplyr::slice_max(order_by=tf_idf, n=10)

8 ##Stand out lemmas

#The lemmas that stand out in these lists are “admission”, “diagnosis”, “chief”, and “complaint. Orthopedics often includes admission and diagnosis of patients based on their presenting complaint, so a treatment modallity can be selected.

analysis.data %>% dplyr::select(medical_specialty, transcription) %>% dplyr::filter(stringr::str_detect(transcription, 'b.i.d')) %>% dplyr::slice(1)

9

##Extracting unusual top lemma

analysis.data %>% dplyr::select(medical_specialty, transcription) %>% dplyr::filter(stringr::str_detect(transcription, 'atv')) %>% dplyr::slice(1)
LS0tCnRpdGxlOiAiS2FtaW5kYSAtIFByYWN0aWNhbCAyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKYGBge3J9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKbGlicmFyeShyZWFkcikKbGlicmFyeShkcGx5cikKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHNjYWxlcykKYGBgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygidGlkeXRleHQiKQppbnN0YWxsLnBhY2thZ2VzKCJ0ZXh0c3RlbSIpCmluc3RhbGwucGFja2FnZXMoImNsaW5zcGFjeSIpCmluc3RhbGwucGFja2FnZXMoInRvcGljbW9kZWxzIikKaW5zdGFsbC5wYWNrYWdlcygicmVzaGFwZTIiKQppbnN0YWxsLnBhY2thZ2VzKCJzdHJpbmdyIikKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh0ZXh0c3RlbSkKbGlicmFyeShjbGluc3BhY3kpCmxpYnJhcnkodG9waWNtb2RlbHMpCmxpYnJhcnkocmVzaGFwZTIpCmxpYnJhcnkoc3RyaW5ncikKYGBgCgojIyNEYXRhIFBhcnNpbmcKCmBgYHtyfQpyYXcuZGF0YSA8LSBjbGluc3BhY3k6OmRhdGFzZXRfbXRzYW1wbGVzKCkKZHBseXI6OmdsaW1wc2UocmF3LmRhdGEpCmBgYAoqKjEqKiAKCiMjI0RhdGEgRGVzY3JpcHRpb24KCiMjTm90ZUlEIC0gVGhpcyB2YXJpYWJsZSBpcyB0aGUgdW5pcXVlIElEIGZvciBlYWNoIG5vdGUuCgojI0Rlc2NyaXB0aW9uIC0gVGhpcyBvdXRwdXQgcHJvdmlkZXMgYSBzdW1tYXJ5IG9mIHRoZSBTT0FQIG5vdGVzLgoKIyNNZWRpY2FsX1NwZWNpYWx0eSAtIFRoaXMgdmFyaWFibGUgcmVmZXJzIHRvIHRoZSBtZWRpY2FsIHNwZWNpYWxpdHkgdGhlIHBhdGllbnQgaXMgdmlzaXRpbmcuCgojI1NhbXBsZV9OYW1lIC0gVGhpcyB2YXJpYWJsZSByZWZlcnMgdG8gdGhlIHByb2NlZHVyZXMgdW5kZXJnb25lIGZvciBlYWNoIHBhdGllbnQuCgojI1RyYW5zY3JpcHRpb24gLSBUaGlzIG91dHB1dCBwcm92aWRlcyBhIGZ1bGwgdHJhbnNjcmlwdCBvZiB0aGUgcGh5c2ljaWFucyBTT0FQIG5vdGVzLgoKIyNLZXl3b3JkcyAtIFRoaXMgb3V0cHV0IGNvbGxlY3RzIGtleXdvcmRzIGZyb20gdGhlIE1lZGljYWxfU3BlY2lhbGl0eSwgU2FtcGxlX05hbWUsIGFuZCBUcmFuc2NyaXB0aW9uIHZhcmlhYmxlcy4KCgoKYGBge3IgcmF3ZGF0YSBtZWRpY2FsIHNwZWNpYWxpdGllc30KCnJhdy5kYXRhICU+JSBkcGx5cjo6c2VsZWN0KG1lZGljYWxfc3BlY2lhbHR5KSAlPiUgZHBseXI6Om5fZGlzdGluY3QoKQpgYGAKCiMjI1RyYW5zY3JpcHRzIHBlciBzcGVjaWFsdHkKCmBgYHtyfQpnZ3Bsb3QyOjpnZ3Bsb3QocmF3LmRhdGEsIGdncGxvdDI6OmFlcyh5PW1lZGljYWxfc3BlY2lhbHR5KSkgKyBnZ3Bsb3QyOjpnZW9tX2JhcigpICsgbGFicyh4PSJEb2N1bWVudCBDb3VudCIsIHk9Ik1lZGljYWwgU3BlY2lhbGl0eSIpCmBgYAoKYGBge3J9CmZpbHRlcmVkLmRhdGEgPC0gcmF3LmRhdGEgJT4lIGRwbHlyOjpmaWx0ZXIobWVkaWNhbF9zcGVjaWFsdHkgJWluJSBjKCJPcnRob3BlZGljIiwgIlJhZGlvbG9neSIsICJTdXJnZXJ5IikpIApgYGAKCgojIyNUZXh0IFByb2Nlc3NpbmcKCmBgYHtyIHRleHQgcHJvY2Vzc2luZ30KCmFuYWx5c2lzLmRhdGEgPC0gZmlsdGVyZWQuZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRyYW5zY3JpcHRpb24pICU+JQogIG11dGF0ZSh3b3JkID0gc3RyX3JlcGxhY2VfYWxsKHdvcmQsICJbXls6YWxudW06XV0iLCAiIikpICU+JQogIGZpbHRlcighc3RyX2RldGVjdCh3b3JkLCAiWzAtOV0iKSkgJT4lCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JQogIGdyb3VwX2J5KG5vdGVfaWQpICU+JQogIHN1bW1hcmlzZSh0cmFuc2NyaXB0aW9uID0gcGFzdGUod29yZCwgY29sbGFwc2UgPSAiICIpKSAlPiUKICBsZWZ0X2pvaW4oc2VsZWN0KGZpbHRlcmVkLmRhdGEsIC10cmFuc2NyaXB0aW9uKSwgYnkgPSAibm90ZV9pZCIpCmBgYAoKCmBgYHtyfQp0b2tlbml6ZWQuZGF0YS51bmlncmFtIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKHdvcmQsIHRyYW5zY3JpcHRpb24sIHRvX2xvd2VyPVRSVUUpCmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKG5ncmFtLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJuZ3JhbXMiLCBuPTIsIHRvX2xvd2VyID0gVFJVRSkKYGBgCgoqKjIqKgoKIyMjVW5pcXVlIFRva2VucyBwZXIgU3BlY2lhbGl0eQoKYGBge3IgdG9rZW4gdW5pZ3JhbX0KdG9rZW5pemVkLmRhdGEudW5pZ3JhbSAlPiUgZHBseXI6Omdyb3VwX2J5KG1lZGljYWxfc3BlY2lhbHR5KSAlPiUgZHBseXI6OmRpc3RpbmN0KHdvcmQpICU+JSBkcGx5cjo6c3VtbWFyaXNlKG49ZHBseXI6Om4oKSkKYGBgCiMjVW5pcXVlIFVuaWdyYW1zCgojI1RoZXJlIGFyZSA3NjgyIHVuaXF1ZSB1bmlncmFtcyBpbiB0aGUgb3J0aG9wZWRpYyBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgNTkzNSB1bmlxdWUgdW5pZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTE5NzcgdW5pcXVlIHVuaWdyYW1zIGluIHRoZSBzdXJnZXJ5IHNwZWNpYWxpdHkuCgoKYGBge3IgdG9rZW4gYmlncmFtfQoKdG9rZW5pemVkLmRhdGEgJT4lIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lIGRwbHlyOjpkaXN0aW5jdChuZ3JhbSkgJT4lIGRwbHlyOjpzdW1tYXJpc2Uobj1kcGx5cjo6bigpKQpgYGAKCgpgYGB7ciB1bmlncmFtIHRva2VuIGRpc3RyaWJ1dGlvbn0KCndvcmRfY291bnRzIDwtIHRva2VuaXplZC5kYXRhLnVuaWdyYW0gJT4lCiAgICBncm91cF9ieSh3b3JkKSAlPiUKICAgIHN1bW1hcmlzZShjb3VudCA9IG4oKSkgJT4lCiAgICB1bmdyb3VwKCkgJT4lCiAgICBhcnJhbmdlKGRlc2MoY291bnQpKQoKY291bnRfZGlzdHJpYnV0aW9uIDwtIHdvcmRfY291bnRzICU+JQogIGdyb3VwX2J5KGNvdW50KSAlPiUKICBzdW1tYXJpc2UobnVtX3dvcmRzID0gbigpKSAlPiUKICB1bmdyb3VwKCkKIAogZ2dwbG90Mjo6Z2dwbG90KGNvdW50X2Rpc3RyaWJ1dGlvbiwgYWVzKHggPSBjb3VudCwgeSA9IG51bV93b3JkcykpICsKICBnZW9tX3BvaW50KCkgKwogIGxhYnModGl0bGUgPSAiU2NhdHRlciBQbG90IG9mIENvdW50IERpc3RyaWJ1dGlvbiIsCiAgICAgICB4ID0gIkNvdW50IG9mIFVuaXF1ZSBXb3JkcyIsCiAgICAgICB5ID0gIk51bWJlciBvZiBXb3JkcyIpCmBgYAoKYGBge3IgYmlncmFtIHRva2VuIGRpc3RyaWJ1dGlvbn0Kd29yZF9jb3VudHMgPC0gdG9rZW5pemVkLmRhdGEgJT4lCiAgICBncm91cF9ieShuZ3JhbSkgJT4lCiAgICBzdW1tYXJpc2UoY291bnQgPSBuKCkpICU+JQogICAgdW5ncm91cCgpICU+JQogICAgYXJyYW5nZShkZXNjKGNvdW50KSkKCmNvdW50X2Rpc3RyaWJ1dGlvbiA8LSB3b3JkX2NvdW50cyAlPiUKICBncm91cF9ieShjb3VudCkgJT4lCiAgc3VtbWFyaXNlKG51bV93b3JkcyA9IG4oKSkgJT4lCiAgdW5ncm91cCgpCiAKIGdncGxvdDI6OmdncGxvdChjb3VudF9kaXN0cmlidXRpb24sIGFlcyh4ID0gY291bnQsIHkgPSBudW1fd29yZHMpKSArCiAgZ2VvbV9wb2ludCgpICsKICBsYWJzKHRpdGxlID0gIlNjYXR0ZXIgUGxvdCBvZiBDb3VudCBEaXN0cmlidXRpb24iLAogICAgICAgeCA9ICJDb3VudCBvZiBVbmlxdWUgQmlncmFtcyIsCiAgICAgICB5ID0gIk51bWJlciBvZiBXb3JkcyIpCmBgYAoKKiozKioKIyMjVW5pcXVlIGJpZ3JhbXMgcGVyIGNhdGVnb3J5CgoKYGBge3IgdG9rZW4gYmlncmFtMn0KCnRva2VuaXplZC5kYXRhICU+JSBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSBkcGx5cjo6ZGlzdGluY3QobmdyYW0pICU+JSBkcGx5cjo6c3VtbWFyaXNlKG49ZHBseXI6Om4oKSkKYGBgCgoKIyNVbmlxdWUgQmlncmFtcwoKIyNUaGVyZSBhcmUgNTU3MzIgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIG9ydGhvcGVkaWMgc3BlY2lhbHR5LgoKIyNUaGVyZSBhcmUgMjgyOTcgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTMwNDA0IHVuaXF1ZSBiaWdyYW1zIGluIHRoZSBzdXJnZXkgc3BlY2lhbGl0eS4KCgoqKjQqKgoKIyNVbmlxdWUgU2VudGVuY2VzCgpgYGB7ciBzZW50ZW5jZXN9CmFuYWx5c2lzLmRhdGEgPC0gZmlsdGVyZWQuZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHNlbnRlbmNlLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJzZW50ZW5jZXMiKSAlPiUKICBtdXRhdGUoc2VudGVuY2UgPSBzdHJfcmVwbGFjZV9hbGwoc2VudGVuY2UsICJbXls6YWxudW06XVxcc10iLCAiIikpICU+JQogIGZpbHRlcighc3RyX2RldGVjdChzZW50ZW5jZSwgIlswLTldIikpICU+JQogIGNyb3NzX2pvaW4oc3RvcF93b3JkcykgJT4lCiAgZ3JvdXBfYnkobm90ZV9pZCkgJT4lCiAgc3VtbWFyaXNlKHRyYW5zY3JpcHRpb24gPSBwYXN0ZShzZW50ZW5jZSwgY29sbGFwc2UgPSAiICIpKSAlPiUKICBsZWZ0X2pvaW4oc2VsZWN0KGZpbHRlcmVkLmRhdGEsIC10cmFuc2NyaXB0aW9uKSwgYnkgPSAibm90ZV9pZCIpCmBgYAoKYGBge3J9Cj9jcm9zc19qb2luCmBgYAoKYGBge3J9Cj9zdHJfZGV0ZWN0CmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhLnNlbnRlbmNlIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKG5ncmFtLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJzZW50ZW5jZXMiLCB0b19sb3dlciA9IFRSVUUpCmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhLnNlbnRlbmNlICU+JQogIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lCiAgZHBseXI6OmNvdW50KG5hbWUgPSAibiIpICU+JQogIGRwbHlyOjp1bmdyb3VwKCkKYGBgCgojI1VuaXF1ZSBTZW50ZW5jZXMKCiMjVGhlcmUgYXJlIDM1MCB1bmlxdWUgYmlncmFtcyBpbiB0aGUgb3J0aG9wZWRpYyBzcGVjaWFsdHkuCgojI1RoZXJlIGFyZSAyNjIgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTA4NSB1bmlxdWUgYmlncmFtcyBpbiB0aGUgc3VyZ2V5IHNwZWNpYWxpdHkuCgojIyNXb3JkcyBwZXIgQ2F0ZWdvcnkKCmBgYHtyfQp0b2tlbml6ZWQuZGF0YSAlPiUKICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JQogIGRwbHlyOjpjb3VudChuZ3JhbSwgc29ydCA9IFRSVUUpICU+JQogIGRwbHlyOjp0b3Bfbig1KQpgYGAKCioqNSoqIAoKIyNVc2Ugb2YgYSBMZW1tYXRpemVyCgojQSBnZW5lcmFsIHB1cnBvc2UgbGVtbWF0aXplciBtYXkgbm90IHdvcmsgd2VsbCBmb3IgbWVkaWNhbCBkYXRhLiBUaGlzIGlzIGJlY2F1c2UgbWVkaWNhbCBkYXRhIGNvbnRhaW5zIGhpZ2hseSBzcGVjaWFsaXplZCB0ZXJtcyB0aGF0IHJlcXVpcmUgYWNjdXJhdGVseSB0cmFpbmVkIG1ldGhvZHMgdG8gYmUgdHJhaW5lZCB0byBhY2N1cmF0ZWx5IHRva2VuIHRlcm1zLiBTb21lIHNwZWNpZmljIGlzc3VlcyBpbmNsdWRlOgoKI2EuIE1lZGljYWwgZGF0YSB1c3VhbGx5IGNvbnRhaW5zIHNwZWNpYWxpemVkIHRlcm1zLCBkcnVncyBuYW1lcywgYW5kIGphcmdvbi4gVGhlcmVmb3JlLCBhIGdlbmVyYWwgcHVycG9zZSB0b29sIG1heSBub3QgaGF2ZSB0aGUga25vd2xlZGdlIG9mIHRoZXNlIHRlcm1zIGFuZCBtYXkgbm90IGJlIHByb2ZpY2llbnQgaW4gYWNjdXJhdGVseSBpZGVudGZ5aW5nIHRoZSBsZW1tYXMuCgojYi4gTWVkaWNhbCB0ZXJtcyB0eXBpY2FsbHkgY29tZSBmcm9tIGRpZmZlcmVudCBwYXJ0cyBvZiBzcGVlY2ggc3VjaCBhcyBub3VucywgdmVyYnMsIGFuZCBhZGplY3RpdmVzLiBTaW5jZSB0aGUgcHJvY2VzcyBvZiBsZW1tYXRpemluZyByZXF1aXJlcyBtYXBwaW5nIHRvIGdlbmVyYXRlIGNvcnJlY3QgbGVtbWFzLCBnZW5lcmFsIHB1cnBvc2UgbGVtbWFzICh3aGljaCBoYXZlIG5vdCBiZWVuIHRyYWluZWQgb24gbWVkaWNhbCBkYXRhKSBtYXkgbm90IHByb2Nlc3MgdGhlIHZhcmlhdGlvbnMgaW4gbWVkaWNhbCBzcGVlY2ggZWZmZWN0aXZlbHkuCgoKYGBge3IgbGVtbWF0aXplcn0KbGVtbWF0aXplZC5kYXRhIDwtIHRva2VuaXplZC5kYXRhICU+JSBkcGx5cjo6bXV0YXRlKGxlbW1hPXRleHRzdGVtOjpsZW1tYXRpemVfd29yZHMobmdyYW0pKQpgYGAKCgpgYGB7cn0KbGVtbWEuZnJlcSA8LSBsZW1tYXRpemVkLmRhdGEgJT4lIAogIGRwbHlyOjpjb3VudChtZWRpY2FsX3NwZWNpYWx0eSwgbGVtbWEpICU+JQogIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lIAogIGRwbHlyOjptdXRhdGUocHJvcG9ydGlvbiA9IG4gLyBzdW0obikpICU+JQogIHRpZHlyOjpwaXZvdF93aWRlcihuYW1lc19mcm9tID0gbWVkaWNhbF9zcGVjaWFsdHksIHZhbHVlc19mcm9tID0gcHJvcG9ydGlvbikgJT4lCiAgdGlkeXI6OnBpdm90X2xvbmdlcihgU3VyZ2VyeWA6YFJhZGlvbG9neWAsCiAgICAgICAgICAgICAgIG5hbWVzX3RvID0gIm1lZGljYWxfc3BlY2lhbHR5IiwgdmFsdWVzX3RvID0gInByb3BvcnRpb24iKQpgYGAKCgpgYGB7cn0KZ2dwbG90Mjo6Z2dwbG90KGxlbW1hLmZyZXEsIGdncGxvdDI6OmFlcyh4PXByb3BvcnRpb24sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHk9YE9ydGhvcGVkaWNgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbG9yPWFicyhgT3J0aG9wZWRpY2AgLSBwcm9wb3J0aW9uKSkpICsgCiAgZ2dwbG90Mjo6Z2VvbV9hYmxpbmUoY29sb3I9ImdyYXk0MCIsIGx0eT0yKSArCiAgZ2dwbG90Mjo6Z2VvbV9qaXR0ZXIoYWxwaGE9MC4xLCBzaXplPTIuNSwgd2lkdGg9MC4zLCBoZWlnaHQ9MC4zKSArCiAgZ2dwbG90Mjo6Z2VvbV90ZXh0KGdncGxvdDI6OmFlcyhsYWJlbD1sZW1tYSksIGNoZWNrX292ZXJsYXA9VFJVRSwgdmp1c3Q9MS41KSArCiAgZ2dwbG90Mjo6c2NhbGVfeF9sb2cxMChsYWJlbHM9c2NhbGVzOjpwZXJjZW50X2Zvcm1hdCgpKSArIAogIGdncGxvdDI6OnNjYWxlX3lfbG9nMTAobGFiZWxzPXNjYWxlczo6cGVyY2VudF9mb3JtYXQoKSkgKyAKICBnZ3Bsb3QyOjpzY2FsZV9jb2xvcl9ncmFkaWVudChsaW1pdHM9YygwLCAwLjAwMSksIGxvdz0iZGFya3NsYXRlZ3JheTQiLCBoaWdoPSJncmF5NzUiKSArCiAgZ2dwbG90Mjo6ZmFjZXRfd3JhcCh+bWVkaWNhbF9zcGVjaWFsdHksIG5jb2wgPSAyKSArCiAgZ2dwbG90Mjo6dGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikgKwogIGdncGxvdDI6OiBsYWJzKHk9Ik9ydGhvcGVkaWMiLCB4ID0gTlVMTCkKYGBgCgoqKjYqKgoKIyNBbmFseXppbmcgcmVsYXRpdmUgcHJvcG9ydGlvbnMgCgojVGhpcyBwbG90IHZpc3VhbGl6ZXMgdGhlIHNhbWUgcmVsYXRpdmUgcHJvcG9ydGlvbiBvZiBsZW1tYXMgaW4gZWFjaCBzcGVjaWFsaXR5LiBCYXNlZCBvbiB0aGVzZSBzcGVjaWFsdGllcywgSSB3b3VsZCBub3QgZXhwZWN0IHRvIHNlZSB0aGUgZXhhY3Qgc2FtZSByZWxhdGl2ZSBwcm9wb3J0aW9ucy4gVGhlcmUgYXJlIHNvbWUgZGlmZmVyZW5jZXMgaW4gdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIG9ydGhvcGFlZGljcyBhbmQgcmFkaW9sb2d5LCBhbmQgb3J0aG9wYWVkaWNzIGFuZCBzdXJnZXJ5OgoKI09ydGhvcGFlZGljcyBhbmQgc3VyZ2VyeSBhcmUgbW9yZSBsaWtlbHkgdG8gaGF2ZSBhIGhpZ2ggZGVncmVlIG9mIGNvbW1vbiBsZW1tYXMuIEJvdGggc3BlY2lhbHRpZXMgYXJlIGJhc2VkIG9uIHN0YW5kYXJkIGFuYXRvbWljYWwgdGVybXMuCgojT3J0aG9wYWVkaWNzIGFuZCByYWRpb2xvZ3kgYXJlIGxlc3MgbGlrZWx5IHRvIGhhdmUgYSBoaWdoIGRlZ3JlZSBvZiBjb21tb24gbGVtbWFzLiBUaGVyZSBpcyBkZWZpbml0ZWx5IG92ZXJsYXAgaG93ZXZlciwgcmFkaW9sb2d5IG1heSByZWZlciBtb3JlIHRvIHRoZSB0ZWNobmlxdWUgb2YgbWVkaWNhbCBpbWFnaW5nLiBUaGVyZWZvcmUsIG1hbnkgb2YgdGhlIGFuYXRvbWljYWwgdGVybXMsIGRpYWdub3NlcywgYW5kIHRyZWF0bWVudHMgcmVsYXRlZCB0byBvcnRob3BhZWRpY3MgbWF5IGJlIG1pc3NlZC4KCioqNyoqCgojI0RpcmVjdCBjb21wYXJpc29uIG9mIHN1cmdlcnkgYW5kIHJhZGlvbG9neQoKYGBge3J9CmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0ZXh0c3RlbSkKYGBgCgoKYGBge3J9CmxlbW1hdGl6ZWQuZGF0YSA8LSB0b2tlbml6ZWQuZGF0YSAlPiUgZHBseXI6Om11dGF0ZShsZW1tYT10ZXh0c3RlbTo6bGVtbWF0aXplX3dvcmRzKG5ncmFtKSkKYGBgCgoKYGBge3J9CmxlbW1hLmZyZXEgPC0gbGVtbWF0aXplZC5kYXRhICU+JSAKICBkcGx5cjo6Y291bnQobWVkaWNhbF9zcGVjaWFsdHksIGxlbW1hKSAlPiUKICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSAKICBkcGx5cjo6bXV0YXRlKHByb3BvcnRpb24gPSBuIC8gc3VtKG4pKSAlPiUKICB0aWR5cjo6cGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IG1lZGljYWxfc3BlY2lhbHR5LCB2YWx1ZXNfZnJvbSA9IHByb3BvcnRpb24pICU+JQogIHRpZHlyOjpwaXZvdF9sb25nZXIoYE9ydGhvcGVkaWNgOmBSYWRpb2xvZ3lgLAogICAgICAgICAgICAgICBuYW1lc190byA9ICJtZWRpY2FsX3NwZWNpYWx0eSIsIHZhbHVlc190byA9ICJwcm9wb3J0aW9uIikKYGBgCgpgYGB7cn0KZ2dwbG90Mjo6Z2dwbG90KGxlbW1hLmZyZXEsIGdncGxvdDI6OmFlcyh4PXByb3BvcnRpb24sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHk9YFN1cmdlcnlgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbG9yPWFicyhgU3VyZ2VyeWAgLSBwcm9wb3J0aW9uKSkpICsgCiAgZ2dwbG90Mjo6Z2VvbV9hYmxpbmUoY29sb3I9ImdyYXk0MCIsIGx0eT0yKSArCiAgZ2dwbG90Mjo6Z2VvbV9qaXR0ZXIoYWxwaGE9MC4xLCBzaXplPTIuNSwgd2lkdGg9MC4zLCBoZWlnaHQ9MC4zKSArCiAgZ2dwbG90Mjo6Z2VvbV90ZXh0KGdncGxvdDI6OmFlcyhsYWJlbD1sZW1tYSksIGNoZWNrX292ZXJsYXA9VFJVRSwgdmp1c3Q9MS41KSArCiAgZ2dwbG90Mjo6c2NhbGVfeF9sb2cxMChsYWJlbHM9c2NhbGVzOjpwZXJjZW50X2Zvcm1hdCgpKSArIAogIGdncGxvdDI6OnNjYWxlX3lfbG9nMTAobGFiZWxzPXNjYWxlczo6cGVyY2VudF9mb3JtYXQoKSkgKyAKICBnZ3Bsb3QyOjpzY2FsZV9jb2xvcl9ncmFkaWVudChsaW1pdHM9YygwLCAwLjAwMSksIGxvdz0iZGFya3NsYXRlZ3JheTQiLCBoaWdoPSJncmF5NzUiKSArCiAgZ2dwbG90Mjo6ZmFjZXRfd3JhcCh+bWVkaWNhbF9zcGVjaWFsdHksIG5jb2wgPSAyKSArCiAgZ2dwbG90Mjo6dGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikgKwogIGdncGxvdDI6OiBsYWJzKHk9IlN1cmdlcnkiLCB4ID0gTlVMTCkKYGBgCgojIyNURi1JREYgTm9ybWFpbGl6YXRpb24KCmBgYHtyIGxlbW1hIGNvdW50c30KbGVtbWEuY291bnRzIDwtIGxlbW1hdGl6ZWQuZGF0YSAlPiUgZHBseXI6OmNvdW50KG1lZGljYWxfc3BlY2lhbHR5LCBsZW1tYSkKdG90YWwuY291bnRzIDwtIGxlbW1hLmNvdW50cyAlPiUgCiAgICAgICAgICAgICAgICAgICAgICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSAKICAgICAgICAgICAgICAgICAgICAgIGRwbHlyOjpzdW1tYXJpc2UodG90YWw9c3VtKG4pKQoKYWxsLmNvdW50cyA8LSBkcGx5cjo6bGVmdF9qb2luKGxlbW1hLmNvdW50cywgdG90YWwuY291bnRzKQpgYGAKCmBgYHtyfQphbGwuY291bnRzLnRmaWRmIDwtIHRpZHl0ZXh0OjpiaW5kX3RmX2lkZihhbGwuY291bnRzLCBsZW1tYSwgbWVkaWNhbF9zcGVjaWFsdHksIG4pIApgYGAKCmBgYHtyfQphbGwuY291bnRzLnRmaWRmICU+JSBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSBkcGx5cjo6c2xpY2VfbWF4KG9yZGVyX2J5PXRmX2lkZiwgbj0xMCkKYGBgCgoqKjgqKgojI1N0YW5kIG91dCBsZW1tYXMKCiNUaGUgbGVtbWFzIHRoYXQgc3RhbmQgb3V0IGluIHRoZXNlIGxpc3RzIGFyZSAiYWRtaXNzaW9uIiwgImRpYWdub3NpcyIsICJjaGllZiIsIGFuZCAiY29tcGxhaW50LiBPcnRob3BlZGljcyBvZnRlbiBpbmNsdWRlcyBhZG1pc3Npb24gYW5kIGRpYWdub3NpcyBvZiBwYXRpZW50cyBiYXNlZCBvbiB0aGVpciBwcmVzZW50aW5nIGNvbXBsYWludCwgc28gYSB0cmVhdG1lbnQgbW9kYWxsaXR5IGNhbiBiZSBzZWxlY3RlZC4KCgpgYGB7cn0KYW5hbHlzaXMuZGF0YSAlPiUgZHBseXI6OnNlbGVjdChtZWRpY2FsX3NwZWNpYWx0eSwgdHJhbnNjcmlwdGlvbikgJT4lIGRwbHlyOjpmaWx0ZXIoc3RyaW5ncjo6c3RyX2RldGVjdCh0cmFuc2NyaXB0aW9uLCAnYi5pLmQnKSkgJT4lIGRwbHlyOjpzbGljZSgxKQpgYGAKKio5KioKCiMjRXh0cmFjdGluZyB1bnVzdWFsIHRvcCBsZW1tYQoKCmBgYHtyfQphbmFseXNpcy5kYXRhICU+JSBkcGx5cjo6c2VsZWN0KG1lZGljYWxfc3BlY2lhbHR5LCB0cmFuc2NyaXB0aW9uKSAlPiUgZHBseXI6OmZpbHRlcihzdHJpbmdyOjpzdHJfZGV0ZWN0KHRyYW5zY3JpcHRpb24sICdhdHYnKSkgJT4lIGRwbHlyOjpzbGljZSgxKQpgYGAKCg==